{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# Various torch packages\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "\n",
    "# torchvision\n",
    "from torchvision import datasets, transforms\n",
    "\n",
    "# ------------------------\n",
    "# get up one directory \n",
    "import sys, os\n",
    "sys.path.append(os.path.abspath('../'))\n",
    "# ------------------------\n",
    "\n",
    "# custom packages\n",
    "import models.aux_funs as maf\n",
    "import optimizers as op\n",
    "import regularizers as reg\n",
    "import train\n",
    "import math\n",
    "import utils.configuration as cf\n",
    "import utils.datasets as ud\n",
    "from models.mnist_conv import mnist_conv"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fix the random seed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "random_seed = 2\n",
    "cf.seed_torch(random_seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Configure the experiment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "conf_args = {#\n",
    "    # data specification\n",
    "    'data_file':\"../../Data\",'train_split':0.95, 'data_set':\"Fashion-MNIST\", 'download':True,\n",
    "    # cuda\n",
    "    'use_cuda':True, 'num_workers':4, 'cuda_device':0, 'pin_memory':True, 'train_split':0.95,\n",
    "    #\n",
    "    'epochs':100,\n",
    "    # optimizer\n",
    "    'delta':1.0, 'lr':0.07, 'lamda_0':0.4, 'lamda_1':0.4, 'optim':\"LinBreg\", 'conv_group':True,\n",
    "    'beta':0.0,\n",
    "    # initialization\n",
    "    'sparse_init':0.01, 'r':[1,1,1],\n",
    "    # misc\n",
    "    'random_seed':random_seed, 'eval_acc':True,\n",
    "}\n",
    "\n",
    "conf = cf.Conf(**conf_args)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Initiate the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "model_kwargs = {'mean':conf.data_set_mean, 'std':conf.data_set_std}    \n",
    "\n",
    "model = mnist_conv(**model_kwargs)\n",
    "best_model = train.best_model(mnist_conv(**model_kwargs).to(conf.device))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Weight initialization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_kwargs = {'mean':conf.data_set_mean, 'std':conf.data_set_std}    \n",
    "def init_weights(conf, model):\n",
    "    # sparsify\n",
    "    maf.sparse_bias_uniform_(model, 0,conf.r[0])\n",
    "    maf.sparse_bias_uniform_(model, 0,conf.r[0],ltype=torch.nn.Conv2d)\n",
    "    maf.sparse_weight_normal_(model, conf.r[1])\n",
    "    maf.sparse_weight_normal_(model, conf.r[2],ltype=torch.nn.Conv2d)\n",
    "    #\n",
    "    maf.sparsify_(model, conf.sparse_init, ltype = nn.Conv2d, conv_group=conf.conv_group)\n",
    "    model = model.to(conf.device)\n",
    "    \n",
    "    return model\n",
    "\n",
    "model = init_weights(conf,model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Optimizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "def init_opt(conf, model):\n",
    "    # -----------------------------------------------------------------------------------\n",
    "    # Get access to different model parameters\n",
    "    # -----------------------------------------------------------------------------------\n",
    "    weights_conv = maf.get_weights_conv(model)\n",
    "    weights_linear = maf.get_weights_linear(model)\n",
    "    biases = maf.get_bias(model)\n",
    "    \n",
    "    # -----------------------------------------------------------------------------------\n",
    "    # Initialize optimizer\n",
    "    # -----------------------------------------------------------------------------------\n",
    "    if conf.conv_group:\n",
    "        reg2 = reg.reg_l1_l2_conv(lamda=conf.lamda_0)\n",
    "    else:\n",
    "        reg2 = reg.reg_l1(lamda=conf.lamda_0)\n",
    "    \n",
    "    if conf.optim == \"SGD\":\n",
    "        opt = torch.optim.SGD(model.parameters(), lr=conf.lr, momentum=conf.beta)\n",
    "    elif conf.optim == \"LinBreg\":\n",
    "        opt = op.LinBreg([{'params': weights_conv, 'lr' : conf.lr, 'reg' : reg2, 'momentum':conf.beta,'delta':conf.delta},\n",
    "                          {'params': weights_linear, 'lr' : conf.lr, 'reg' : reg.reg_l1(lamda=conf.lamda_1), 'momentum':conf.beta,'delta':conf.delta},\n",
    "                          {'params': biases, 'lr': conf.lr, 'momentum':conf.beta}])\n",
    "    elif conf.optim == \"ProxSGD\":\n",
    "        opt = op.ProxSGD([{'params': weights_conv, 'lr' : conf.lr, 'reg' : reg2, 'momentum':conf.beta,'delta':conf.delta},\n",
    "                          {'params': weights_linear, 'lr' : conf.lr, 'reg' : reg.reg_l1(lamda=conf.lamda_1), 'momentum':conf.beta,'delta':conf.delta},\n",
    "                          {'params': biases, 'lr': conf.lr, 'momentum':conf.beta}])            \n",
    "    elif conf.optim == \"AdaBreg\":\n",
    "        opt = op.AdaBreg([{'params': weights_conv, 'lr' : conf.lr, 'reg' : reg2,'delta':conf.delta},\n",
    "                           {'params': weights_linear, 'lr' : conf.lr, 'reg' : reg.reg_l1(lamda=conf.lamda_1),'delta':conf.delta},\n",
    "                           {'params': biases, 'lr': conf.lr}])\n",
    "    elif conf.optim == \"L1SGD\":\n",
    "        def weight_reg(model):\n",
    "            reg1 =  reg.reg_l1(lamda=conf.lamda_1)\n",
    "        \n",
    "            loss1 = reg1(model.layers2[0].weight) + reg1(model.layers2[2].weight)\n",
    "            loss2 = reg2(model.layers1[0].weight) + reg2(model.layers1[3].weight)\n",
    "            return loss1 + loss2\n",
    "        \n",
    "        conf.weight_reg = weight_reg\n",
    "        \n",
    "        opt = torch.optim.SGD(model.parameters(), lr=lr, momentum=beta)\n",
    "    else:\n",
    "        raise ValueError(\"Unknown Optimizer specified\")\n",
    "\n",
    "    # learning rate scheduler\n",
    "    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, factor=0.5, patience=5,threshold=0.01)\n",
    "    \n",
    "    return opt, scheduler"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_loader, valid_loader, test_loader = ud.get_data_set(conf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# History and Runs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize history\n",
    "tracked = ['loss', 'node_sparse']\n",
    "train_hist = {}\n",
    "val_hist = {}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 0\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.4905438596491228\n",
      "Train Loss: 599.9519957304001\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.6636666666666666\n",
      "Non-zero kernels: 0.010336538461538461\n",
      "Linear sparsity: 0.9874954666344294\n",
      "Overall sparsity: 0.5575243704305443\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [20.609855890274048, 1916.2779724121094, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 1\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.726859649122807\n",
      "Train Loss: 321.7138690650463\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.72\n",
      "Non-zero kernels: 0.010576923076923078\n",
      "Linear sparsity: 0.9845185565764023\n",
      "Overall sparsity: 0.5559631397238017\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [21.931122541427612, 1925.8936706542968, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 2\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.7573508771929824\n",
      "Train Loss: 283.2701005637646\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.7493333333333333\n",
      "Non-zero kernels: 0.010817307692307692\n",
      "Linear sparsity: 0.9821460952611218\n",
      "Overall sparsity: 0.554740387219063\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [22.88484787940979, 1932.7815185546876, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 3\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.7731228070175439\n",
      "Train Loss: 264.6545532941818\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.7506666666666667\n",
      "Non-zero kernels: 0.010817307692307692\n",
      "Linear sparsity: 0.9803629714700194\n",
      "Overall sparsity: 0.5537418765231519\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [23.61707067489624, 1938.436853027344, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 4\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.7843684210526316\n",
      "Train Loss: 251.59723964333534\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.738\n",
      "Non-zero kernels: 0.011057692307692308\n",
      "Linear sparsity: 0.9790558510638298\n",
      "Overall sparsity: 0.5531156918494449\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [24.143518447875977, 1943.5679443359377, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 5\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.7935087719298246\n",
      "Train Loss: 242.93793419003487\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.756\n",
      "Non-zero kernels: 0.012259615384615384\n",
      "Linear sparsity: 0.9781265111218569\n",
      "Overall sparsity: 0.553124153804495\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [25.272939682006836, 1948.768377685547, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 6\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.799719298245614\n",
      "Train Loss: 234.21597203612328\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.7843333333333333\n",
      "Non-zero kernels: 0.0125\n",
      "Linear sparsity: 0.9772122823984526\n",
      "Overall sparsity: 0.5527179799620905\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [26.03877353668213, 1953.3736633300782, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 7\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8073508771929825\n",
      "Train Loss: 227.64893770217896\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.7476666666666667\n",
      "Non-zero kernels: 0.013221153846153846\n",
      "Linear sparsity: 0.9759051619922631\n",
      "Overall sparsity: 0.5523033441646358\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [26.99472665786743, 1957.9061157226563, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 8\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8121578947368421\n",
      "Train Loss: 221.73580500483513\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.7863333333333333\n",
      "Non-zero kernels: 0.013221153846153846\n",
      "Linear sparsity: 0.9750967117988395\n",
      "Overall sparsity: 0.5518506295694557\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [27.986443519592285, 1961.9242736816407, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 9\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8178421052631579\n",
      "Train Loss: 216.34336504340172\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.7743333333333333\n",
      "Non-zero kernels: 0.013461538461538462\n",
      "Linear sparsity: 0.9736535904255319\n",
      "Overall sparsity: 0.5511482873002979\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [29.211545944213867, 1966.301416015625, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 10\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8230701754385965\n",
      "Train Loss: 210.54700848460197\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.7946666666666666\n",
      "Non-zero kernels: 0.014423076923076924\n",
      "Linear sparsity: 0.9717646881044487\n",
      "Overall sparsity: 0.5505136406715407\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [30.43342161178589, 1970.321826171875, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 11\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8286491228070175\n",
      "Train Loss: 203.57705023884773\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.805\n",
      "Non-zero kernels: 0.014423076923076924\n",
      "Linear sparsity: 0.9702460106382979\n",
      "Overall sparsity: 0.5496632141890062\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [31.629031658172607, 1974.3253845214845, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 12\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8329649122807018\n",
      "Train Loss: 198.45980513095856\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8133333333333334\n",
      "Non-zero kernels: 0.014423076923076924\n",
      "Linear sparsity: 0.968409997582205\n",
      "Overall sparsity: 0.5486350866504197\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [33.02216958999634, 1978.5940429687503, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 13\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8371929824561404\n",
      "Train Loss: 193.7299616187811\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8243333333333334\n",
      "Non-zero kernels: 0.015384615384615385\n",
      "Linear sparsity: 0.9668837645067698\n",
      "Overall sparsity: 0.5482035269428649\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [34.14260244369507, 1982.7187561035157, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 14\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.841859649122807\n",
      "Train Loss: 189.0045658648014\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.818\n",
      "Non-zero kernels: 0.015384615384615385\n",
      "Linear sparsity: 0.9651988636363636\n",
      "Overall sparsity: 0.5472600189547793\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [35.17772960662842, 1986.4052246093752, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 15\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8441929824561404\n",
      "Train Loss: 184.81027503311634\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8206666666666667\n",
      "Non-zero kernels: 0.015384615384615385\n",
      "Linear sparsity: 0.9633779617988395\n",
      "Overall sparsity: 0.5462403533712429\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [36.341172218322754, 1990.2953247070313, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 16\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8491929824561404\n",
      "Train Loss: 180.38021367788315\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8246666666666667\n",
      "Non-zero kernels: 0.015865384615384615\n",
      "Linear sparsity: 0.9614437258220503\n",
      "Overall sparsity: 0.5453687720010831\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [37.6322922706604, 1994.2618286132813, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 17\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8523684210526316\n",
      "Train Loss: 176.60947446525097\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.812\n",
      "Non-zero kernels: 0.015625\n",
      "Linear sparsity: 0.9597437137330754\n",
      "Overall sparsity: 0.5443110276198213\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [38.68380832672119, 1997.9598632812501, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 18\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8547719298245614\n",
      "Train Loss: 172.973073720932\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8233333333333334\n",
      "Non-zero kernels: 0.01610576923076923\n",
      "Linear sparsity: 0.9578548114119922\n",
      "Overall sparsity: 0.5434648321148118\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [39.994805335998535, 2002.0039184570312, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 19\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.857701754385965\n",
      "Train Loss: 169.46296548843384\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.819\n",
      "Non-zero kernels: 0.01658653846153846\n",
      "Linear sparsity: 0.9563739119922631\n",
      "Overall sparsity: 0.5428471093961549\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [41.14143753051758, 2005.568212890625, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 20\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8607719298245614\n",
      "Train Loss: 166.05749748647213\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8343333333333334\n",
      "Non-zero kernels: 0.016346153846153847\n",
      "Linear sparsity: 0.9544623428433269\n",
      "Overall sparsity: 0.5416708976441917\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [42.464152812957764, 2009.665728759766, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 21\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8647543859649123\n",
      "Train Loss: 162.60473188757896\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8386666666666667\n",
      "Non-zero kernels: 0.01658653846153846\n",
      "Linear sparsity: 0.9524374395551257\n",
      "Overall sparsity: 0.5406427701056052\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [43.58053398132324, 2013.4664489746096, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 22\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8654035087719298\n",
      "Train Loss: 159.97284810245037\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8283333333333334\n",
      "Non-zero kernels: 0.01658653846153846\n",
      "Linear sparsity: 0.9508129835589942\n",
      "Overall sparsity: 0.5397331099377201\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [44.54319190979004, 2017.2648620605469, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 23\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.869421052631579\n",
      "Train Loss: 157.09398755431175\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.836\n",
      "Non-zero kernels: 0.01658653846153846\n",
      "Linear sparsity: 0.949377417794971\n",
      "Overall sparsity: 0.538929224207961\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [45.56796932220459, 2021.0041137695312, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 24\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8714561403508772\n",
      "Train Loss: 153.97634340822697\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8206666666666667\n",
      "Non-zero kernels: 0.016346153846153847\n",
      "Linear sparsity: 0.9479720744680851\n",
      "Overall sparsity: 0.538036487950176\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [46.40139579772949, 2024.5768676757814, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 25\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8740526315789474\n",
      "Train Loss: 151.547552511096\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8406666666666667\n",
      "Non-zero kernels: 0.016826923076923076\n",
      "Linear sparsity: 0.9463098404255319\n",
      "Overall sparsity: 0.537317221770918\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [47.3722767829895, 2028.296276855469, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 26\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8749473684210526\n",
      "Train Loss: 149.14873805642128\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8453333333333334\n",
      "Non-zero kernels: 0.01706730769230769\n",
      "Linear sparsity: 0.9446627176015474\n",
      "Overall sparsity: 0.5365006431085838\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [48.390018939971924, 2032.2774841308594, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 27\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8776491228070176\n",
      "Train Loss: 146.66258372366428\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8233333333333334\n",
      "Non-zero kernels: 0.016826923076923076\n",
      "Linear sparsity: 0.9430231503868471\n",
      "Overall sparsity: 0.5354767465475223\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [49.22260856628418, 2035.9159912109376, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 28\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8784912280701754\n",
      "Train Loss: 144.7751125395298\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8446666666666667\n",
      "Non-zero kernels: 0.016826923076923076\n",
      "Linear sparsity: 0.941655585106383\n",
      "Overall sparsity: 0.5347109396154888\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [50.18672180175781, 2039.5538696289063, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 29\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8806491228070176\n",
      "Train Loss: 142.18728604912758\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.846\n",
      "Non-zero kernels: 0.01658653846153846\n",
      "Linear sparsity: 0.9401142408123792\n",
      "Overall sparsity: 0.5337420457622529\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [51.0415153503418, 2043.1389587402346, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 30\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8814035087719299\n",
      "Train Loss: 140.2355877906084\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8486666666666667\n",
      "Non-zero kernels: 0.01706730769230769\n",
      "Linear sparsity: 0.9389204545454546\n",
      "Overall sparsity: 0.5332851001895478\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [51.86433506011963, 2046.7233215332033, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 31\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8841228070175439\n",
      "Train Loss: 138.27073088288307\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.848\n",
      "Non-zero kernels: 0.016826923076923076\n",
      "Linear sparsity: 0.9373564434235977\n",
      "Overall sparsity: 0.5323035134037368\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [52.80537796020508, 2050.496472167969, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 32\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8860350877192983\n",
      "Train Loss: 136.0197576880455\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.854\n",
      "Non-zero kernels: 0.01730769230769231\n",
      "Linear sparsity: 0.9360039893617021\n",
      "Overall sparsity: 0.5317577173030057\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [54.02199840545654, 2054.380682373047, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 33\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8875263157894737\n",
      "Train Loss: 134.62643042206764\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8413333333333334\n",
      "Non-zero kernels: 0.017548076923076923\n",
      "Linear sparsity: 0.9345608679883946\n",
      "Overall sparsity: 0.5310553750338478\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [55.034624099731445, 2058.319262695313, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 34\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8888771929824562\n",
      "Train Loss: 132.67596624791622\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.845\n",
      "Non-zero kernels: 0.017548076923076923\n",
      "Linear sparsity: 0.9330119680851063\n",
      "Overall sparsity: 0.5301880246412131\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [56.126346588134766, 2062.446423339844, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 35\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8896140350877193\n",
      "Train Loss: 131.12491908669472\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8526666666666667\n",
      "Non-zero kernels: 0.01730769230769231\n",
      "Linear sparsity: 0.9319919608317214\n",
      "Overall sparsity: 0.5295110682372055\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [57.16599464416504, 2066.3141906738283, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 36\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.892140350877193\n",
      "Train Loss: 128.50546145439148\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8543333333333333\n",
      "Non-zero kernels: 0.017548076923076923\n",
      "Linear sparsity: 0.9307075072533849\n",
      "Overall sparsity: 0.5288975764960736\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [58.064680099487305, 2070.127130126953, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 37\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8939473684210526\n",
      "Train Loss: 127.16229742765427\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8403333333333334\n",
      "Non-zero kernels: 0.017548076923076923\n",
      "Linear sparsity: 0.929619499516441\n",
      "Overall sparsity: 0.5282883157324668\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [59.152228355407715, 2074.1364807128907, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 38\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8947894736842106\n",
      "Train Loss: 125.68659114837646\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8646666666666667\n",
      "Non-zero kernels: 0.017788461538461538\n",
      "Linear sparsity: 0.9283652683752418\n",
      "Overall sparsity: 0.5276917479014351\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [59.90653896331787, 2078.053521728516, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 39\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8963508771929825\n",
      "Train Loss: 123.95455355197191\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8653333333333333\n",
      "Non-zero kernels: 0.017788461538461538\n",
      "Linear sparsity: 0.9275114845261122\n",
      "Overall sparsity: 0.5272136474411048\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [60.86151313781738, 2081.948779296875, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 40\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8967894736842106\n",
      "Train Loss: 122.91564971208572\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.853\n",
      "Non-zero kernels: 0.017788461538461538\n",
      "Linear sparsity: 0.9263932543520309\n",
      "Overall sparsity: 0.5265874627673978\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [61.84208869934082, 2085.9619079589847, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 41\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8969298245614035\n",
      "Train Loss: 121.78862115740776\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.863\n",
      "Non-zero kernels: 0.017788461538461538\n",
      "Linear sparsity: 0.925055911508704\n",
      "Overall sparsity: 0.5258385797454644\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [62.59378719329834, 2089.9864501953125, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 42\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8991929824561403\n",
      "Train Loss: 119.87843751907349\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8563333333333333\n",
      "Non-zero kernels: 0.018028846153846152\n",
      "Linear sparsity: 0.9240283486460348\n",
      "Overall sparsity: 0.5253689412401842\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [63.4525671005249, 2093.570544433594, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 43\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8990175438596492\n",
      "Train Loss: 119.24771866202354\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.856\n",
      "Non-zero kernels: 0.01826923076923077\n",
      "Linear sparsity: 0.9228647848162476\n",
      "Overall sparsity: 0.524823145139453\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [64.44304084777832, 2097.6353515625, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 44\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.8999473684210526\n",
      "Train Loss: 117.23268580436707\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.862\n",
      "Non-zero kernels: 0.018509615384615385\n",
      "Linear sparsity: 0.9218598887814313\n",
      "Overall sparsity: 0.5243661995667479\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [65.56240749359131, 2101.753869628906, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 45\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9015964912280702\n",
      "Train Loss: 115.90008194744587\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.866\n",
      "Non-zero kernels: 0.01875\n",
      "Linear sparsity: 0.9205905464216635\n",
      "Overall sparsity: 0.5237611697806661\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [66.49563598632812, 2106.0099609375, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 46\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9028421052631579\n",
      "Train Loss: 114.85915347933769\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.864\n",
      "Non-zero kernels: 0.01875\n",
      "Linear sparsity: 0.9196536508704062\n",
      "Overall sparsity: 0.5232365285675602\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [67.6556282043457, 2109.844110107422, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 47\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9050175438596492\n",
      "Train Loss: 112.96016170829535\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8696666666666667\n",
      "Non-zero kernels: 0.019230769230769232\n",
      "Linear sparsity: 0.9185278650870407\n",
      "Overall sparsity: 0.5228176617925806\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [68.82407283782959, 2113.5478637695314, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 48\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9059122807017543\n",
      "Train Loss: 111.99749346077442\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8636666666666667\n",
      "Non-zero kernels: 0.019230769230769232\n",
      "Linear sparsity: 0.9172056334622823\n",
      "Overall sparsity: 0.5220772407256973\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [69.82349395751953, 2117.557830810547, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 49\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9061228070175439\n",
      "Train Loss: 110.07643392682076\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8656666666666667\n",
      "Non-zero kernels: 0.01971153846153846\n",
      "Linear sparsity: 0.9165709622823984\n",
      "Overall sparsity: 0.5219333874898456\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [70.90884590148926, 2121.7400024414064, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 50\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9077543859649123\n",
      "Train Loss: 109.18104350566864\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8523333333333334\n",
      "Non-zero kernels: 0.020192307692307693\n",
      "Linear sparsity: 0.9153696204061895\n",
      "Overall sparsity: 0.5214722109396155\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [72.0524034500122, 2125.7885620117186, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 51\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9090175438596492\n",
      "Train Loss: 107.98168586194515\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.87\n",
      "Non-zero kernels: 0.020913461538461537\n",
      "Linear sparsity: 0.9146971711798839\n",
      "Overall sparsity: 0.5214129772542648\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [73.15806770324707, 2129.754150390625, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 52\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.910578947368421\n",
      "Train Loss: 106.44281203299761\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.871\n",
      "Non-zero kernels: 0.020432692307692308\n",
      "Linear sparsity: 0.9136922751450677\n",
      "Overall sparsity: 0.5206387083671812\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [74.2904167175293, 2133.390625, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 53\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9101228070175439\n",
      "Train Loss: 105.15661072731018\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8706666666666667\n",
      "Non-zero kernels: 0.021153846153846155\n",
      "Linear sparsity: 0.9127629352030948\n",
      "Overall sparsity: 0.5204356214459789\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [75.56821632385254, 2137.439245605469, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 54\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9111578947368421\n",
      "Train Loss: 104.93834330886602\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.871\n",
      "Non-zero kernels: 0.020913461538461537\n",
      "Linear sparsity: 0.9117655947775629\n",
      "Overall sparsity: 0.5197713579745464\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [76.31024074554443, 2141.0327270507814, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 55\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9134736842105263\n",
      "Train Loss: 102.45694842934608\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.867\n",
      "Non-zero kernels: 0.020913461538461537\n",
      "Linear sparsity: 0.9108513660541586\n",
      "Overall sparsity: 0.5192594096940157\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [77.48171615600586, 2145.123980712891, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 56\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9142280701754386\n",
      "Train Loss: 101.58735100924969\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8446666666666667\n",
      "Non-zero kernels: 0.02139423076923077\n",
      "Linear sparsity: 0.9099824709864603\n",
      "Overall sparsity: 0.5189843961548877\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [78.73525619506836, 2149.301538085938, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 57\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.913719298245614\n",
      "Train Loss: 100.84997541457415\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.865\n",
      "Non-zero kernels: 0.02139423076923077\n",
      "Linear sparsity: 0.9089322412959381\n",
      "Overall sparsity: 0.518396290278906\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [79.65910720825195, 2153.059735107422, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 58\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9159649122807018\n",
      "Train Loss: 99.04482144117355\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.866\n",
      "Non-zero kernels: 0.021634615384615384\n",
      "Linear sparsity: 0.9079953457446809\n",
      "Overall sparsity: 0.5179774235039264\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [80.98019313812256, 2157.037976074219, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 59\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9167017543859649\n",
      "Train Loss: 98.37669304013252\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8686666666666667\n",
      "Non-zero kernels: 0.02139423076923077\n",
      "Linear sparsity: 0.9071868955512572\n",
      "Overall sparsity: 0.51741893447062\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [82.10839653015137, 2161.017218017578, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 60\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9167368421052632\n",
      "Train Loss: 97.42083543539047\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.86\n",
      "Non-zero kernels: 0.021634615384615384\n",
      "Linear sparsity: 0.906272666827853\n",
      "Overall sparsity: 0.5170127606282156\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [82.92450904846191, 2165.1751586914065, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 61\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9180175438596492\n",
      "Train Loss: 96.61734464764595\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8666666666666667\n",
      "Non-zero kernels: 0.021875\n",
      "Linear sparsity: 0.9051393254352031\n",
      "Overall sparsity: 0.5164838884375846\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [83.83632946014404, 2169.279895019531, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 62\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.919578947368421\n",
      "Train Loss: 94.99768266826868\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.869\n",
      "Non-zero kernels: 0.022115384615384617\n",
      "Linear sparsity: 0.9043535420696325\n",
      "Overall sparsity: 0.5161496412131059\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [85.03223419189453, 2173.324169921875, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 63\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9204912280701755\n",
      "Train Loss: 94.48133793473244\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8523333333333334\n",
      "Non-zero kernels: 0.021875\n",
      "Linear sparsity: 0.9033033123791102\n",
      "Overall sparsity: 0.5154557608989981\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [86.18335151672363, 2177.401745605469, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 64\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9208947368421052\n",
      "Train Loss: 92.8945726081729\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.869\n",
      "Non-zero kernels: 0.021875\n",
      "Linear sparsity: 0.9025553070599613\n",
      "Overall sparsity: 0.5150368941240184\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [87.11040496826172, 2181.5280761718755, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 65\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9215438596491228\n",
      "Train Loss: 92.0372574403882\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.869\n",
      "Non-zero kernels: 0.022115384615384617\n",
      "Linear sparsity: 0.9015730778529981\n",
      "Overall sparsity: 0.5145926414838884\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [88.09263801574707, 2185.7546875, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 66\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9226666666666666\n",
      "Train Loss: 91.16710320860147\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8763333333333333\n",
      "Non-zero kernels: 0.022596153846153846\n",
      "Linear sparsity: 0.9009157398452611\n",
      "Overall sparsity: 0.5144360953154616\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [89.01589107513428, 2190.2968139648438, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 67\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9238947368421052\n",
      "Train Loss: 89.77213114500046\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.852\n",
      "Non-zero kernels: 0.02235576923076923\n",
      "Linear sparsity: 0.9000392891682786\n",
      "Overall sparsity: 0.51383952748443\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [90.01452255249023, 2194.3781799316407, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 68\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9231929824561403\n",
      "Train Loss: 89.75256142765284\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.858\n",
      "Non-zero kernels: 0.02283653846153846\n",
      "Linear sparsity: 0.8988379472920697\n",
      "Overall sparsity: 0.5133783509341998\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [90.98226165771484, 2198.4940551757813, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 69\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9254736842105263\n",
      "Train Loss: 87.90081233531237\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.842\n",
      "Non-zero kernels: 0.02283653846153846\n",
      "Linear sparsity: 0.8980672751450677\n",
      "Overall sparsity: 0.512946791226645\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [92.02505779266357, 2202.870141601563, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 70\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9259298245614035\n",
      "Train Loss: 87.16405177861452\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8376666666666667\n",
      "Non-zero kernels: 0.02283653846153846\n",
      "Linear sparsity: 0.8972361581237911\n",
      "Overall sparsity: 0.5124813836988897\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [93.12923431396484, 2207.5142211914062, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 71\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9263333333333333\n",
      "Train Loss: 86.03985262662172\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.848\n",
      "Non-zero kernels: 0.02283653846153846\n",
      "Linear sparsity: 0.8963143737911026\n",
      "Overall sparsity: 0.511965204440834\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [94.00269889831543, 2211.7072998046874, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 72\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9281403508771929\n",
      "Train Loss: 85.70569814741611\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8796666666666667\n",
      "Non-zero kernels: 0.023076923076923078\n",
      "Linear sparsity: 0.8954228118955513\n",
      "Overall sparsity: 0.5115717235310046\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [94.99760723114014, 2216.0568725585936, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 73\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9275263157894736\n",
      "Train Loss: 84.46326706558466\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.869\n",
      "Non-zero kernels: 0.023076923076923078\n",
      "Linear sparsity: 0.894327248549323\n",
      "Overall sparsity: 0.5109582317898728\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [95.88998985290527, 2220.159692382813, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 74\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9292807017543859\n",
      "Train Loss: 83.57542058825493\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8753333333333333\n",
      "Non-zero kernels: 0.023076923076923078\n",
      "Linear sparsity: 0.8938436895551257\n",
      "Overall sparsity: 0.5106874492282697\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [96.94554328918457, 2224.6552856445314, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 75\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9299649122807018\n",
      "Train Loss: 82.086973965168\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.865\n",
      "Non-zero kernels: 0.02283653846153846\n",
      "Linear sparsity: 0.8925970140232108\n",
      "Overall sparsity: 0.5098835634985107\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [97.67413711547852, 2228.863354492187, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 76\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9307543859649123\n",
      "Train Loss: 82.1105827242136\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.88\n",
      "Non-zero kernels: 0.023076923076923078\n",
      "Linear sparsity: 0.8915467843326886\n",
      "Overall sparsity: 0.5094012320606552\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [98.66822624206543, 2233.1810546875004, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 77\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9311929824561404\n",
      "Train Loss: 80.38935200124979\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8376666666666667\n",
      "Non-zero kernels: 0.023076923076923078\n",
      "Linear sparsity: 0.8905721107350096\n",
      "Overall sparsity: 0.5088554359599242\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [99.73928833007812, 2237.801617431641, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 78\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9325438596491228\n",
      "Train Loss: 79.7449304908514\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8706666666666667\n",
      "Non-zero kernels: 0.023557692307692307\n",
      "Linear sparsity: 0.8899827732108317\n",
      "Overall sparsity: 0.5087369685892229\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [100.84839534759521, 2242.460223388672, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 79\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9338771929824562\n",
      "Train Loss: 78.97205368056893\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8646666666666667\n",
      "Non-zero kernels: 0.02403846153846154\n",
      "Linear sparsity: 0.8892725459381045\n",
      "Overall sparsity: 0.5085508055781208\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [101.60745811462402, 2247.0472961425785, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 80\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9339824561403509\n",
      "Train Loss: 78.05783534049988\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8783333333333333\n",
      "Non-zero kernels: 0.023798076923076922\n",
      "Linear sparsity: 0.8883658728239845\n",
      "Overall sparsity: 0.5079373138369889\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [102.7930965423584, 2251.454296875, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 81\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9342105263157895\n",
      "Train Loss: 77.62332136183977\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.862\n",
      "Non-zero kernels: 0.023798076923076922\n",
      "Linear sparsity: 0.8873760880077369\n",
      "Overall sparsity: 0.5073830557812077\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [103.75831317901611, 2255.8247619628905, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 82\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9352280701754386\n",
      "Train Loss: 76.56582607328892\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8713333333333333\n",
      "Non-zero kernels: 0.023798076923076922\n",
      "Linear sparsity: 0.886333413926499\n",
      "Overall sparsity: 0.5067991808827511\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [104.6218900680542, 2260.074664306641, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 83\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9364210526315789\n",
      "Train Loss: 75.53524789959192\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8603333333333333\n",
      "Non-zero kernels: 0.023798076923076922\n",
      "Linear sparsity: 0.8855627417794971\n",
      "Overall sparsity: 0.5063676211751963\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [105.36591053009033, 2264.494110107422, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 84\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9363684210526316\n",
      "Train Loss: 75.01620749756694\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8613333333333333\n",
      "Non-zero kernels: 0.02403846153846154\n",
      "Linear sparsity: 0.8846711798839458\n",
      "Overall sparsity: 0.5059741402653669\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [106.38819694519043, 2269.160546875, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 85\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9371929824561404\n",
      "Train Loss: 74.55274821072817\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8723333333333333\n",
      "Non-zero kernels: 0.024278846153846154\n",
      "Linear sparsity: 0.8837720623791102\n",
      "Overall sparsity: 0.5055764283780124\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [107.35175609588623, 2273.6634887695313, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 86\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9385614035087719\n",
      "Train Loss: 73.53810742497444\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8413333333333334\n",
      "Non-zero kernels: 0.023798076923076922\n",
      "Linear sparsity: 0.883061835106383\n",
      "Overall sparsity: 0.5049671676144056\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [108.36159801483154, 2278.155615234375, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 87\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9373508771929825\n",
      "Train Loss: 72.77530238777399\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8743333333333333\n",
      "Non-zero kernels: 0.02451923076923077\n",
      "Linear sparsity: 0.8822382736943907\n",
      "Overall sparsity: 0.504823314378554\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [109.3875904083252, 2282.809924316406, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 88\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9397894736842105\n",
      "Train Loss: 71.74142197147012\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8706666666666667\n",
      "Non-zero kernels: 0.024278846153846154\n",
      "Linear sparsity: 0.8814827127659575\n",
      "Overall sparsity: 0.5042944421879231\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [110.15070152282715, 2287.117041015625, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 89\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9405263157894737\n",
      "Train Loss: 70.19949121028185\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.869\n",
      "Non-zero kernels: 0.024759615384615383\n",
      "Linear sparsity: 0.8808329303675049\n",
      "Overall sparsity: 0.5041421269970214\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [111.15300941467285, 2291.9157104492188, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 90\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9413333333333334\n",
      "Train Loss: 69.8767657354474\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8776666666666667\n",
      "Non-zero kernels: 0.025240384615384616\n",
      "Linear sparsity: 0.8801831479690522\n",
      "Overall sparsity: 0.5039898118061197\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [112.18622589111328, 2296.3934692382813, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 91\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9406491228070175\n",
      "Train Loss: 69.7620655298233\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8716666666666667\n",
      "Non-zero kernels: 0.024759615384615383\n",
      "Linear sparsity: 0.8795711436170213\n",
      "Overall sparsity: 0.5034355537503384\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [113.04442977905273, 2301.356964111328, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 92\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9420350877192982\n",
      "Train Loss: 68.67929100990295\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8773333333333333\n",
      "Non-zero kernels: 0.025\n",
      "Linear sparsity: 0.8789138056092843\n",
      "Overall sparsity: 0.5031732331437856\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [113.91798686981201, 2305.98740234375, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 93\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9424912280701755\n",
      "Train Loss: 67.5185741558671\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8793333333333333\n",
      "Non-zero kernels: 0.025240384615384616\n",
      "Linear sparsity: 0.8780297993230174\n",
      "Overall sparsity: 0.5027839832114812\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [115.05155086517334, 2310.6888000488284, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 94\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9426491228070175\n",
      "Train Loss: 66.94312888756394\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8733333333333333\n",
      "Non-zero kernels: 0.025240384615384616\n",
      "Linear sparsity: 0.8771533486460348\n",
      "Overall sparsity: 0.5022931898185757\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [115.888671875, 2315.5059814453125, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 95\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9437543859649122\n",
      "Train Loss: 66.8992928378284\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.842\n",
      "Non-zero kernels: 0.024759615384615383\n",
      "Linear sparsity: 0.8761862306576402\n",
      "Overall sparsity: 0.5015400758191173\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [116.71436214447021, 2320.1056884765626, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 96\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9445614035087719\n",
      "Train Loss: 65.27483919262886\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.875\n",
      "Non-zero kernels: 0.025240384615384616\n",
      "Linear sparsity: 0.8756573380077369\n",
      "Overall sparsity: 0.5014554562686163\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [117.59396266937256, 2324.7059692382813, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 97\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9445614035087719\n",
      "Train Loss: 64.81200005486608\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8736666666666667\n",
      "Non-zero kernels: 0.02548076923076923\n",
      "Linear sparsity: 0.8752795575435203\n",
      "Overall sparsity: 0.5013496818304901\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [118.69038772583008, 2329.557196044922, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 98\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9460175438596491\n",
      "Train Loss: 64.09673370793462\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.874\n",
      "Non-zero kernels: 0.02548076923076923\n",
      "Linear sparsity: 0.8745391078336557\n",
      "Overall sparsity: 0.5009350460330355\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [119.43184566497803, 2333.8711853027344, 0.0]\n",
      "Learning rate: 0.07\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "||||||||||||||||||||||||||||||||||||||||||||||||||\n",
      "<><><><><><><><><><><><><><><><><><><><><><><><><>\n",
      "Epoch: 99\n",
      "--------------------------------------------------\n",
      "Train Accuracy: 0.9452982456140351\n",
      "Train Loss: 63.7171530649066\n",
      "--------------------------------------------------\n",
      "Validation Accuracy: 0.8756666666666667\n",
      "Non-zero kernels: 0.025721153846153845\n",
      "Linear sparsity: 0.8737911025145068\n",
      "Overall sparsity: 0.500621953696182\n",
      "Node sparsity: [1.0, 1.0]\n",
      "Regularization values per group: [120.59517097473145, 2338.536932373047, 0.0]\n",
      "Learning rate: 0.07\n"
     ]
    }
   ],
   "source": [
    "# -----------------------------------------------------------------------------------\n",
    "# Reinit weigts and the corresponding optimizer\n",
    "# -----------------------------------------------------------------------------------\n",
    "model = init_weights(conf, model)\n",
    "opt, scheduler = init_opt(conf, model)\n",
    "\n",
    "# -----------------------------------------------------------------------------------\n",
    "# train the model\n",
    "# -----------------------------------------------------------------------------------\n",
    "for epoch in range(conf.epochs):\n",
    "    print(25*\"<>\")\n",
    "    print(50*\"|\")\n",
    "    print(25*\"<>\")\n",
    "    print('Epoch:', epoch)\n",
    "\n",
    "    # ------------------------------------------------------------------------\n",
    "    # train step, log the accuracy and loss\n",
    "    # ------------------------------------------------------------------------\n",
    "    train_data = train.train_step(conf, model, opt, train_loader)\n",
    "\n",
    "    # update history\n",
    "    for key in tracked:\n",
    "        if key in train_data:\n",
    "            var_list = train_hist.setdefault(key, [])\n",
    "            var_list.append(train_data[key])           \n",
    "\n",
    "    # ------------------------------------------------------------------------\n",
    "    # validation step\n",
    "    val_data = train.validation_step(conf, model, opt, valid_loader)\n",
    "\n",
    "    # update validation history\n",
    "    for key in tracked:\n",
    "        if key in val_data:\n",
    "            var = val_data[key]\n",
    "            if isinstance(var, list):\n",
    "                for i, var_loc in enumerate(var):\n",
    "                    key_loc = key+\"_\" + str(i)\n",
    "                    var_list = val_hist.setdefault(key_loc, [])\n",
    "                    val_hist[key_loc].append(var_loc)\n",
    "            else:\n",
    "                var_list = val_hist.setdefault(key, [])\n",
    "                var_list.append(var)   \n",
    "\n",
    "\n",
    "    scheduler.step(train_data['loss'])\n",
    "    print(\"Learning rate:\",opt.param_groups[0]['lr'])\n",
    "    best_model(train_data['acc'], val_data['acc'], model=model)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "vscode": {
   "interpreter": {
    "hash": "07cecbd58096559357ff47686066ecdb82e32493890ce712e45aa4f3fdd673c4"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
